import hashlib, json, requests, time, re, pymysql

from lxml import etree

# TODO Mysql 配置
host = '127.0.0.1'
users = 'root'
pwd = ''
port = 3306
dbs = 'python'

def mysql(id, user_name, level, detail, create_time):
    """
    数据库的存储
    :param id: 用户id
    :param user_name: 用户姓名
    :param level: 用户等级
    :param detail: 评论内容
    :param create_time: 发布时间
    :return: None
    """
    value = ((id, user_name, level, detail, create_time))
    db = pymysql.connect(host=host, user=users, passwd=pwd, port=port, db=dbs)
    cursor = db.cursor()
    sql = "INSERT INTO mafengwo(id,user_name,level,detail,create_time) values(%s,%s,%s,%s,%s)"
    try:
        cursor.execute(sql, value)
        db.commit()
        print('success!')
    except Exception as e:
        db.rollback()
        print("error.", e)
    db.close()


def get_params(page, poi_id):
    """
    获取params参数
    :param page: 页码
    :param poi_id: 酒店id
    :return: params参数
    """
    # 创建一个md5对象
    m = hashlib.md5()
    # 将字典转换为JSON格式的字符串并传入md5对象
    ts = str(int(time.time() * 1000))
    params = {
        "_ts": ts,
        "keyword_id": "0",
        "page": f"{page}",
        "poi_id": f"{poi_id}",
        "type": "0"
    }
    salt = "c9d6618dbc657b41a66eb0af952906f1"
    m.update((json.dumps(params, separators=(',', ':')) + salt).encode())
    # 获取md5加密后的十六进制字符串
    params.update({"_sn": m.hexdigest()[2:12]})
    return params


def down(page,pid):
    """
    获取响应值的html内容
    :param page: 页码
    :param pid: 酒店id
    :return: 服务器响应回来的html标签
    """
    url = "https://www.mafengwo.cn/hotel/info/comment_list"
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.41",
    }
    params = get_params(page, pid)
    return requests.get(url, headers=headers, params=params).json()["html"]


def func(res):
    """
    解析数据并存入数据库
    :param res: html的text格式内容
    :return: None
    """
    tree = etree.HTML(res)
    div_list = tree.xpath('//div[@class="comm-item _j_comment_item"]')
    for div in div_list:
        user_name = ''.join(div.xpath('div[@class="user"]/a[@class="name"]/text()'))
        level = ''.join(div.xpath('div[@class="user"]/a[@class="LV"]/text()'))
        id = ''.join(re.findall("u=(.*?)&", div.xpath('div[@class="user"]/a[@class="avatar"]/@href')[0])) if \
            div.xpath('div[@class="user"]/a[@class="avatar"]/@href')[0] != 'javascript:;' else ""
        detail = ''.join(div.xpath('div[@class="txt"]/text()')).replace('\U0001f31f', '')
        create_time = ''.join(div.xpath("div[@class='comm-meta']/span[@class='time']/text()"))
        mysql(id, user_name, level, detail, create_time)


if __name__ == '__main__':
    res = down(1,7091472)
    func(res)
